با استفاده از داده های زلزله ها در ایران و جهان به سوالات زیر پاسخ دهید.


۱. با استفاده از داده های historical_web_data_26112015.rds و استفاده از نمودار پراکنش سه بعدی بسته plotly نمودار طول، عرض و عمق زلزله ها را رسم نمایید. علاوه بر آن بزرگی هر نقطه را برابر بزرگی زمین لرزه قرار دهید.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readr)
library(ggplot2)
library(highcharter)
## Highcharts (www.highcharts.com) is a Highsoft software product which is
## not free for commercial and Governmental use
library(tidyr)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(ggmap)
## 
## Attaching package: 'ggmap'
## The following object is masked from 'package:plotly':
## 
##     wind
equake = readRDS("data/historical_web_data_26112015.rds")
equake
##                      Time Magnitude Latitude Longitude Depth
##    1: 2015-11-26 11:06:37       2.5   37.923    46.871     6
##    2: 2015-11-26 10:14:05       2.5   32.757    47.653     5
##    3: 2015-11-26 09:04:29       1.7   38.451    44.952    10
##    4: 2015-11-26 08:23:10       2.4   36.748    54.561    10
##    5: 2015-11-26 08:14:56       1.7   36.134    58.731    18
##   ---                                                       
## 1996: 2015-09-09 16:23:23       2.6   32.714    49.894    10
## 1997: 2015-09-09 15:54:43       1.7   32.186    54.302     5
## 1998: 2015-09-09 14:55:31       1.7   33.384    49.708    10
## 1999: 2015-09-09 14:08:06       2.9   29.152    55.397    10
## 2000: 2015-09-09 13:51:09       1.8   31.866    55.676    10
##              Province          City
##    1: East Azarbaijan   Bostan abad
##    2:            Ilam       Murmuri
##    3: West Azarbaijan          Khoy
##    4:        Golestan        Gorgan
##    5: Khorasan Razavi    Neishaboor
##   ---                              
## 1996:         isfahan Fereidonshahr
## 1997:            Yazd      Ashkezar
## 1998:        Lorestan     Aligodarz
## 1999:          kerman   Najaf shahr
## 2000:            Yazd       Bahabad
note: I cunstruct the map and made a user name and pass on plotly like said on below but ay last when running the api create i got an error that as I search I can not find any solution for this in my computer . Error: lexical error: invalid char in json text.
                 (right here) ------^
Sys.setenv("plotly_username"="salehane")
Sys.setenv("plotly_api_key"="1EictJ86dl5qR090tOOd")

p <- plot_ly(equake, x = ~Latitude, y = ~Longitude, z = ~Depth,
        marker = list(color = ~Magnitude, colorscale = c('#FFE1A1', '#683531'), showscale = TRUE)) %>%
  add_markers() %>%
  layout(scene = list(xaxis = list(title = 'Latitude'),
                     yaxis = list(title = 'Longitude'),
                     zaxis = list(title = 'Depth')),
         annotations = list(
           x = 1.13,
           y = 1.05,
           text = 'Magnit',
           xref = 'paper',
           yref = 'paper',
           showarrow = FALSE
         ))

chart_link = api_create(p, filename="scatter3d-colorscale")
chart_link

۲. پویانمایی سونامی های تاریخی را بر حسب شدت بر روی نقشه زمین رسم نمایید.(از داده زلزله های بزرگ استفاده نمایید.)

disaster = read_delim("data/disaster.txt", "\t", escape_double = FALSE, trim_ws = TRUE)
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   FLAG_TSUNAMI = col_character(),
##   SECOND = col_character(),
##   EQ_PRIMARY = col_double(),
##   EQ_MAG_MW = col_double(),
##   EQ_MAG_MS = col_double(),
##   EQ_MAG_MB = col_character(),
##   EQ_MAG_ML = col_double(),
##   EQ_MAG_MFA = col_character(),
##   EQ_MAG_UNK = col_double(),
##   COUNTRY = col_character(),
##   STATE = col_character(),
##   LOCATION_NAME = col_character(),
##   LATITUDE = col_double(),
##   LONGITUDE = col_double(),
##   MISSING = col_character(),
##   DAMAGE_MILLIONS_DOLLARS = col_character(),
##   TOTAL_MISSING = col_character(),
##   TOTAL_MISSING_DESCRIPTION = col_character(),
##   TOTAL_DAMAGE_MILLIONS_DOLLARS = col_character()
## )
## See spec(...) for full column specifications.
disaster %>% filter(YEAR>2000) %>% filter(FLAG_TSUNAMI=='Tsu') %>% 
rename(lat = LATITUDE,lon = LONGITUDE, z = DEATHS,name = COUNTRY,sequence = YEAR) %>% 
  dplyr::select(lat, lon, z, name, sequence) -> dis 
hcmap() %>% 
  hc_add_series(data = dis, type = "mapbubble",
                minSize = 0, maxSize = 30)  %>% 
  hc_plotOptions(series = list(showInLegend = FALSE))

۳. نمودار چگالی دو بعدی زلزله های تاریخی ایران را رسم کنید.( از داده iran_earthquake.rds و لایه stat_density_2d استفاده نمایید).

iequake = read_rds("data/iran_earthquake.rds")
iequake
##           NO          OriginTime    Lat    Long Depth Mag RMSRes
##     1:     1 2015-11-26 02:15:12 30.793  56.555    10 2.6    0.5
##     2:     2 2015-11-26 02:11:30 38.445  46.800    10 1.4    0.2
##     3:     3 2015-11-26 01:28:06 36.806  46.976    18 2.0    0.4
##     4:     4 2015-11-26 00:51:36 31.890  49.496    13 1.9    0.4
##     5:     5 2015-11-25 23:43:31 33.207  49.578    10 1.8    0.4
##    ---                                                          
## 96031: 95429 2006-01-01 11:28:46 35.552  53.246    10 1.7    0.1
## 96032: 95430 2006-01-01 07:38:03 33.936  48.699     4 3.4    0.5
## 96033: 95431 2006-01-01 05:59:48 35.270  61.964    14 2.2    0.0
## 96034: 95432 2006-01-01 03:08:08 33.962  48.661     6 3.3    0.4
## 96035: 95433 1970-01-01 22:58:42 38.695 141.818    52 5.0    0.0
##        NoofStations Noofphases
##     1:            9          1
##     2:            4          2
##     3:            6          3
##     4:            8          4
##     5:            9          4
##    ---                        
## 96031:            3          2
## 96032:           20         94
## 96033:            4          8
## 96034:           23         58
## 96035:            4        111
myMap = read_rds("data/Tehrn_map_6.rds")
ggmap(myMap)+
  stat_density_2d(data=iequake,aes(x = Long, y = Lat,color = Mag))+ scale_color_distiller(palette = "Spectral")
## Warning: Removed 243 rows containing non-finite values (stat_density2d).


۴. احتمال اینکه در ایران در پنج سال آینده زلزله به بزرگی هفت ریشتر رخ دهد را محاسبه کنید. (از احتمال شرطی استفاده کنید.)

I cunctruct gamma probebilty distibution on past data by fitdist function . as I plot gamma is a good probbeility distribution for this data . then by shape and rate I find probebility of acuring an earthquake in one year as p_gamma. (I use 6.5 to 7.5 as 7 rishter magnitute .)

for finding probebility of happening an earthqueke within 5 years I do this : 1-(1-p_gamma)^5 that means 1- probeility of not having an 7 risther earthqueke for 5 years .

library(fitdistrplus)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:plotly':
## 
##     select
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: survival
library(dplyr)
iequake = read_rds("data/iran_earthquake.rds")
emag = iequake$Mag[complete.cases(iequake$Mag)]

plotdist(emag,histo = TRUE, demp = TRUE)

### it seems that gamma is a good option for this probebility distribution
fit.gamma <- fitdist(emag ,distr = "gamma",'mme')
shape = fit.gamma$estimate[1]
rate = fit.gamma$estimate[2]
a <- pgamma(6.5, shape=shape, rate = rate, lower.tail = TRUE,
       log.p = FALSE)
b  <- pgamma(7.5, shape=shape, rate = rate, lower.tail = TRUE,
       log.p = FALSE)
p_gamma = b-a
1-(1-p_gamma)^5
## [1] 6.727548e-05

p = 6.727548e-05 !


۵. بر اساس داده های زلزله های بزرگ ابتدا تعداد و متوسط کشته زلزله ها را بر حسب کشور استخراج نمایید. سپس نمودار گرمایی تعداد کشته ها را بر روی کره زمین رسم نمایید.(مانند مثال زیر!)

disaster = read_delim("data/disaster.txt", "\t", escape_double = FALSE, trim_ws = TRUE)
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   FLAG_TSUNAMI = col_character(),
##   SECOND = col_character(),
##   EQ_PRIMARY = col_double(),
##   EQ_MAG_MW = col_double(),
##   EQ_MAG_MS = col_double(),
##   EQ_MAG_MB = col_character(),
##   EQ_MAG_ML = col_double(),
##   EQ_MAG_MFA = col_character(),
##   EQ_MAG_UNK = col_double(),
##   COUNTRY = col_character(),
##   STATE = col_character(),
##   LOCATION_NAME = col_character(),
##   LATITUDE = col_double(),
##   LONGITUDE = col_double(),
##   MISSING = col_character(),
##   DAMAGE_MILLIONS_DOLLARS = col_character(),
##   TOTAL_MISSING = col_character(),
##   TOTAL_MISSING_DESCRIPTION = col_character(),
##   TOTAL_DAMAGE_MILLIONS_DOLLARS = col_character()
## )
## See spec(...) for full column specifications.
disaster %>%
  rename(lat = LATITUDE,lon = LONGITUDE, z = DEATHS,name = COUNTRY,sequence = YEAR) %>% 
  dplyr::select(lat, lon, z, name, sequence,TOTAL_DEATHS) -> dead_temp
### mean dead by each country
dead_temp %>% group_by(name) %>% summarise(con_death=mean(TOTAL_DEATHS,na.rm = T)) ->
  mean_deadbycon
hcmap(data=mean_deadbycon,value = "con_death",
      name = "heat earthq",
      dataLabels = list(enabled = TRUE, format = '{point.name}'),
      borderColor = "#FAFAFA", borderWidth = 0.1,tooltip = list(valueDecimals = 2))


۶. با استفاده از داده لرزه های بزرگ و به وسیله طول، عرض، شدت، عمق مدلی برای پیش بینی تعداد کشته های زلزله بیابید.

I fit a GLM model with link function poisson beacause we have nonnegetive and descrite death count .

disaster = read_delim("data/disaster.txt", "\t", escape_double = FALSE, trim_ws = TRUE)
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   FLAG_TSUNAMI = col_character(),
##   SECOND = col_character(),
##   EQ_PRIMARY = col_double(),
##   EQ_MAG_MW = col_double(),
##   EQ_MAG_MS = col_double(),
##   EQ_MAG_MB = col_character(),
##   EQ_MAG_ML = col_double(),
##   EQ_MAG_MFA = col_character(),
##   EQ_MAG_UNK = col_double(),
##   COUNTRY = col_character(),
##   STATE = col_character(),
##   LOCATION_NAME = col_character(),
##   LATITUDE = col_double(),
##   LONGITUDE = col_double(),
##   MISSING = col_character(),
##   DAMAGE_MILLIONS_DOLLARS = col_character(),
##   TOTAL_MISSING = col_character(),
##   TOTAL_MISSING_DESCRIPTION = col_character(),
##   TOTAL_DAMAGE_MILLIONS_DOLLARS = col_character()
## )
## See spec(...) for full column specifications.
disaster %>% dplyr::select(FOCAL_DEPTH,LATITUDE,LONGITUDE,INTENSITY,TOTAL_DEATHS) -> temp_data
### remove nan data 
temp_data <- temp_data[complete.cases(temp_data),]
model = glm(formula = TOTAL_DEATHS~.,data = temp_data,family=poisson())
summary(model)
## 
## Call:
## glm(formula = TOTAL_DEATHS ~ ., family = poisson(), data = temp_data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -351.74   -78.18   -40.68   -14.03   669.52  
## 
## Coefficients:
##               Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)  3.570e-02  6.132e-03    5.821 5.84e-09 ***
## FOCAL_DEPTH -2.315e-02  5.590e-05 -414.115  < 2e-16 ***
## LATITUDE     4.671e-03  3.708e-05  125.962  < 2e-16 ***
## LONGITUDE    5.175e-03  1.010e-05  512.419  < 2e-16 ***
## INTENSITY    9.411e-01  6.180e-04 1522.916  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 10230920  on 429  degrees of freedom
## Residual deviance:  6423124  on 425  degrees of freedom
## AIC: 6425605
## 
## Number of Fisher Scoring iterations: 8

as we can see by p-value add all of each feature can add some information about dead count .

testing the accuracy by cross validation.

library(boot)
## 
## Attaching package: 'boot'
## The following object is masked from 'package:survival':
## 
##     aml
cost <- function(labels,pred){
 mean(labels==ifelse(pred > 0.5, 1, 0))
}
glm_cv <- cv.glm(temp_data, model, K=10,cost = cost)
1-glm_cv$delta[1]
## [1] 0.8744186

as it can be seen our model accuracy is good on this data . ***

۷. با استفاده از داده worldwide.csv به چند سوال زیر پاسخ دهید. تحقیق کنید آیا می توان از پیش لرزه، زلزله اصلی را پیش بینی کرد؟

equake = read_csv("data/worldwide.csv")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   time = col_datetime(format = ""),
##   magType = col_character(),
##   nst = col_integer(),
##   net = col_character(),
##   id = col_character(),
##   updated = col_datetime(format = ""),
##   place = col_character(),
##   type = col_character(),
##   magNst = col_integer(),
##   status = col_character(),
##   locationSource = col_character(),
##   magSource = col_character()
## )
## See spec(...) for full column specifications.
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
equake$country = sub(".*, ", "",equake$place)
equake$year = year(equake$time)
equake %>% filter(type=='earthquake') %>%  arrange(time)
## # A tibble: 60,114 x 24
##    time                latitude longitude  depth   mag magType   nst   gap
##    <dttm>                 <dbl>     <dbl>  <dbl> <dbl> <chr>   <int> <dbl>
##  1 2015-12-01 00:13:49     36.7    - 98.5   7.55  2.70 ml         NA 123  
##  2 2015-12-01 00:31:24     36.8    - 98.0   6.96  2.50 ml         NA  97.0
##  3 2015-12-01 00:46:31     36.7    - 98.5   7.92  3.20 ml         NA  98.0
##  4 2015-12-01 00:58:33     19.0    - 65.1  31.0   2.90 Md          7 259  
##  5 2015-12-01 00:59:20     60.8    - 29.0  10.0   4.30 mb         NA 114  
##  6 2015-12-01 01:10:30     36.4      70.8 102     4.00 mb         NA 112  
##  7 2015-12-01 01:29:18     20.0    -109    10.0   4.20 mb         NA 215  
##  8 2015-12-01 02:54:49     18.5    - 67.1  77.0   2.80 Md         11 209  
##  9 2015-12-01 02:58:24    -19.6    - 69.3 100     4.30 mb         NA  79.0
## 10 2015-12-01 03:04:03    -30.5    - 71.7  30.9   4.10 mwr        NA  82.0
## # ... with 60,104 more rows, and 16 more variables: dmin <dbl>, rms <dbl>,
## #   net <chr>, id <chr>, updated <dttm>, place <chr>, type <chr>,
## #   horizontalError <dbl>, depthError <dbl>, magError <dbl>, magNst <int>,
## #   status <chr>, locationSource <chr>, magSource <chr>, country <chr>,
## #   year <dbl>
equake
## # A tibble: 60,631 x 24
##    time                latitude longitude  depth   mag magType   nst   gap
##    <dttm>                 <dbl>     <dbl>  <dbl> <dbl> <chr>   <int> <dbl>
##  1 2015-12-01 00:13:49     36.7    - 98.5   7.55  2.70 ml         NA 123  
##  2 2015-12-01 00:31:24     36.8    - 98.0   6.96  2.50 ml         NA  97.0
##  3 2015-12-01 00:46:31     36.7    - 98.5   7.92  3.20 ml         NA  98.0
##  4 2015-12-01 00:58:33     19.0    - 65.1  31.0   2.90 Md          7 259  
##  5 2015-12-01 00:59:20     60.8    - 29.0  10.0   4.30 mb         NA 114  
##  6 2015-12-01 01:10:30     36.4      70.8 102     4.00 mb         NA 112  
##  7 2015-12-01 01:29:18     20.0    -109    10.0   4.20 mb         NA 215  
##  8 2015-12-01 02:54:49     18.5    - 67.1  77.0   2.80 Md         11 209  
##  9 2015-12-01 02:58:24    -19.6    - 69.3 100     4.30 mb         NA  79.0
## 10 2015-12-01 03:04:03    -30.5    - 71.7  30.9   4.10 mwr        NA  82.0
## # ... with 60,621 more rows, and 16 more variables: dmin <dbl>, rms <dbl>,
## #   net <chr>, id <chr>, updated <dttm>, place <chr>, type <chr>,
## #   horizontalError <dbl>, depthError <dbl>, magError <dbl>, magNst <int>,
## #   status <chr>, locationSource <chr>, magSource <chr>, country <chr>,
## #   year <dbl>

۸. گزاره " آیا شدت زلزله به عمق آن بستگی دارد" را تحقیق کنید؟ (طبیعتا از آزمون فرض باید استفاده کنید.)

I use Chi-squared Test of Independence for testing dependency of these 2 varibles. I first plot histogram of two variable too find optimal bining for each variable .

testd <- data.frame(mag =equake$mag,depth=equake$depth)
testd <- testd[complete.cases(testd),] 
hchist(equake$mag,name = "Magnitude")
hchist(equake$depth,name = "depth")

now change countinse variable to descrite one . I bin each variable from its mean to max to 5 bin . then I use test of independece .

testd$cat_mag<- cut(testd$mag,seq(min(testd$mag),max(testd$mag),5))
testd$cat_depth<- cut(testd$depth,seq(min(testd$depth),max(testd$depth),5))
tbl <- table(testd$cat_mag,testd$cat_depth)
chisq.test(as.matrix(tbl))
## 
##  Chi-squared test for given probabilities
## 
## data:  as.matrix(tbl)
## X-squared = 780880, df = 134, p-value < 2.2e-16

it seems that there is a dependence between these two varible . ***

۹. میانگین سالانه زلزله ها را بر حسب کشور به دست آورید. آیا میتوان دلیلی در تایید یا رد تئوری هارپ ارائه کرد.

I first find country name and year of each earthquek . then I find mean magnitute of earthquake of each year .

library(lubridate)
equake$country = sub(".*, ", "",equake$place)
equake$year = year(equake$time)
length(unique(equake$country))
## [1] 341
equake %>% group_by(country,year) %>% summarise(mean_mag=mean(mag))-> equake_mean

for testing whether mean of earthquake all around the world has been change I use anova test .
I use this data to solve this problem but I think that we have to use data of other years (before and after 1993 (initialization of haarp)) to determine this. butas it said in question 7 i use this .

library("ggpubr")
## Loading required package: magrittr
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:ggmap':
## 
##     inset
## The following object is masked from 'package:tidyr':
## 
##     extract
ggboxplot(equake_mean, x = "year", y = "mean_mag", 
          color = "year",
          ylab = "mean_magnit", xlab = "year")

# Compute the analysis of variance
res.aov <- aov(mean_mag ~ year, data = equake_mean)
# Summary of the analysis
summary(res.aov)
##              Df Sum Sq Mean Sq F value Pr(>F)
## year          1    0.1  0.0584   0.131  0.718
## Residuals   923  411.5  0.4459

by this p-value we can not say that there is a lot of diffrence in one year on another .

using diaster data for here I just analyse oposit country of america like iran and russia befor and after 1993 .

disaster %>% dplyr::select(YEAR,COUNTRY,INTENSITY) %>% group_by(COUNTRY,YEAR) %>% summarise(mean_mag=mean(INTENSITY,na.rm=T)) -> temp
temp <- temp[complete.cases(temp),]
temp %>% mutate(is_after1993=YEAR>1993) -> temp
op_country <- c('RUSSIA','IRAN')
iran <-temp %>% filter(COUNTRY=='IRAN')
ras <- temp %>% filter(COUNTRY=='RUSSIA')

iran

library("ggpubr")
ggboxplot(temp, x = "is_after1993", y = "mean_mag", 
          color = "is_after1993",
          ylab = "mean_magnit", xlab = "year")

iran$is_after1993 = as.factor(iran$is_after1993)
iran %>%t.test(mean_mag ~ is_after1993 ,data=.,, alternative = 'greater')
## 
##  Welch Two Sample t-test
## 
## data:  mean_mag by is_after1993
## t = 1.2717, df = 4.0916, p-value = 0.1355
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  -1.04875      Inf
## sample estimates:
## mean in group FALSE  mean in group TRUE 
##            8.042328            6.466667

by this p-value we can say intensity decreases even :))) so how harp can be true ?


۱۰. سه حقیقت جالب در مورد زلزله بیابید.

1- NUMBER AND MAGNITUDE OF EARTHQUAKES AROUND THE WORLD: by this I will find magnitute of earthqueks around the worl

map <- ggplot(equake) + borders("world", colour="black", fill="gray50")  
print(map + geom_point(aes(x=equake$longitude, y=equake$latitude,color=equake$mag),shape=18) +
        scale_color_gradient(low="blue", high="red") +
        theme(legend.position = "top")+
        ggtitle("earthquakes by mag"))

2- DISTRIBUTION OF THE MAGNITUDE ACROSS EARTHQUAKES:

ggplot(equake,aes(mag))+
  geom_area(aes(y = ..count..,fill="blue"), stat = "bin")+
  labs(title="Earthquakes",caption="jhervas") + 
  guides(fill=FALSE)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

it seems that the peak on distribution of wold earthqueke is on 4.5 or below 1 rishter .

3- change of mag type between years :)

sum_magn_type <- equake%>% 
                 dplyr::select(year, magType) %>% 
                group_by(year,magType) %>% 
                summarize(count = n())

ggplot(sum_magn_type, aes(x =  year, y = count, fill = magType, 
                              colour =magType, alpha = 1.5))  + 
           geom_point()  + geom_line()